해당 자료는 전북대학교 이영미 교수님 2023고급시계열분석 자료임
library (lubridate)
library (ggplot2)
library (car)
#library(forecast)
library (lmtest)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:base’:
date, intersect, setdiff, union
Loading required package: carData
Loading required package: zoo
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
1
z <- c (52 ,46 ,46 ,52 ,50 ,50 ,48 ,45 ,41 ,53 )
n <- length (z)
(3)
sum ((z- mean (z))^ 2 )
s2 = sum ((z- mean (z))^ 2 )/ (n-1 )
s2
130.1
14.4555555555556
bar_z + qt (0.975 ,(n-1 )) * sqrt ((1 + 1 / n)* s2)
bar_z - qt (0.975 ,(n-1 )) * sqrt ((1 + 1 / n)* s2)
57.3206225347423
39.2793774652577
-
lm함수 비교
Call:
lm(formula = z ~ 1)
Residuals:
Min 1Q Median 3Q Max
-7.3 -2.3 0.7 3.2 4.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 48.300 1.202 40.17 1.83e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.802 on 9 degrees of freedom
predict (m, newdata= data.frame (t= 11 : 15 ), interval= "prediction" )
A matrix: 5 × 3 of type dbl
1
48.3
39.27938
57.32062
2
48.3
39.27938
57.32062
3
48.3
39.27938
57.32062
4
48.3
39.27938
57.32062
5
48.3
39.27938
57.32062
2
(3)
z <- c (303 ,298 ,303 ,314 ,303 ,314 ,310 ,324 ,317 ,327 ,323 ,324 ,331 ,330 ,332 )
n <- length (z)
hat_beta_0 = 2 * (2 * n+ 1 )/ n/ (n-1 ) * sum (z) - 6 / n/ (n-1 )* sum (t* z)
hat_beta_0
hat_beta_1 = 12 / n/ (n^ 2-1 )* sum (t* z) - 6 / n/ (n-1 )* sum (z)
hat_beta_1
297.780952380952
2.3857142857143
(4)
\(\hat Z_n(l) = \hat \beta_0 + \hat \beta_1(n+l)\)
hat_beta_0 + hat_beta_1 * (15 + (1 : 5 ))
335.952380952381 338.338095238095 340.72380952381 343.109523809524 345.495238095238
-
lm비교
Call:
lm(formula = z ~ t)
Residuals:
Min 1Q Median 3Q Max
-6.710 -2.331 -1.181 2.519 7.133
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 297.781 2.364 125.964 < 2e-16 ***
t 2.386 0.260 9.176 4.84e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.351 on 13 degrees of freedom
Multiple R-squared: 0.8662, Adjusted R-squared: 0.856
F-statistic: 84.19 on 1 and 13 DF, p-value: 4.836e-07
predict (m, newdata = data.frame (t= 15 + (1 : 5 )), interval= "prediction" )
A matrix: 5 × 3 of type dbl
1
335.9524
325.2553
346.6495
2
338.3381
327.3932
349.2830
3
340.7238
329.5084
351.9393
4
343.1095
331.6025
354.6166
5
345.4952
333.6771
357.3134
3
note: 모의시계열 표본평균과 표본분산을 안구했었네!;
t <- 1 : 100
####### (1)
Zt_1 <- 100 + rnorm (100 ,0 ,1 )
####### (2)
Zt_2 <- 500 + rnorm (100 ,0 ,1 )
####### (3)
Zt_3 <- 100 + rnorm (100 ,0 ,10 )
####### (4)
Zt_4 <- 100 + t * rnorm (100 ,0 ,1 )
dt <- data.frame (
Zt_1 = Zt_1,
Zt_2 = Zt_2,
Zt_3 = Zt_3,
Zt_4 = Zt_4)
round (colMeans (dt),2 )
round (apply (dt,2 ,var),2 )
Zt_1 100.04 Zt_2 499.98 Zt_3 100.45 Zt_4 95.8
Zt_1 1.05 Zt_2 0.84 Zt_3 89.68 Zt_4 2449.71
ts.plot (ts (Zt_1), ts (Zt_2), ts (Zt_3), ts (Zt_4),
lwd= 2 ,
col= 1 : 4 )
legend (c (0 ,450 ), legend = c (expression (Z[t1]), expression (Z[t2]), expression (Z[t3]),expression (Z[t4]),
col= 1 : 4 , bty= 'n' , lty= 1 , lwd= 2 )
ERROR: Error in parse(text = x, srcfile = src): <text>:6:0: unexpected end of input
4: legend(c(0,450), legend = c(expression(Z[t1]), expression(Z[t2]), expression(Z[t3]),expression(Z[t4]),
5: col= 1:4, bty='n', lty=1, lwd=2)
^
4
###### (1)
Zt_1 <- 100 + rnorm (100 ,0 ,1 )
####### (2)
Zt_2 <- 100 + t + rnorm (100 ,0 ,1 )
####### (3)
Zt_3 <- 100 + 0.3 * t + sin (2 * pi* t/ 12 ) + cos (2 * pi* t/ 12 ) + rnorm (100 ,0 ,1 )
Warning message in 100 + t + rnorm(100, 0, 1):
“longer object length is not a multiple of shorter object length”
Warning message in 100 + 0.3 * t + sin(2 * pi * t/12) + cos(2 * pi * t/12) + rnorm(100, :
“longer object length is not a multiple of shorter object length”
ts.plot (ts (Zt_1), ts (Zt_2), ts (Zt_3),col= 1 : 3 , lwd= 2 )
legend ("topleft" , legend = c (expression (Z[t1]), expression (Z[t2]), expression (Z[
col= 1 : 3 , lty= 1 , lwd= 2 )
Zt_3 <- 100 + 0.3 * t + 3 * sin (2 * pi* t/ 12 ) + 20 * cos (2 * pi* t/ 12 ) + rnorm (100 ,0 ,1 )
ts.plot (ts (Zt_1), ts (Zt_2), ts (Zt_3),col= 1 : 3 )
legend ("topleft" , legend = c (expression (Z[t1]), expression (Z[t2]), expression (Z[
col= 1 : 3 , lty= 1 )
5
z <- scan ("book.txt" )
plot.ts (z)
(3)
\(Z_t = \beta_0 + \beta_1 t + \epsilon_t\)
t <- 1 : length (z)
m <- lm (z~ t)
summary (m)
Call:
lm(formula = z ~ t)
Residuals:
Min 1Q Median 3Q Max
-2.5563 -1.0063 -0.2081 1.0385 2.0644
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.19080 0.46960 17.44 <2e-16 ***
t 3.07586 0.02645 116.28 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.254 on 28 degrees of freedom
Multiple R-squared: 0.9979, Adjusted R-squared: 0.9979
F-statistic: 1.352e+04 on 1 and 28 DF, p-value: < 2.2e-16
(4)
\(\hat Z_{n+l} = \hat Z_n(l) = \hat \beta_0 + \hat \beta_1(n+l)\)
new_dt <- data.frame (t= 30 + (1 : 12 ))
predict (m, new_dt)
1 103.542528735632 2 106.618390804598 3 109.694252873563 4 112.770114942529 5 115.845977011494 6 118.92183908046 7 121.997701149425 8 125.073563218391 9 128.149425287356 10 131.225287356322 11 134.301149425287 12 137.377011494253
pred <- predict (m, new_dt, interval= "prediction" )
pred
A matrix: 12 × 3 of type dbl
1
103.5425
100.7996
106.2855
2
106.6184
103.8584
109.3784
3
109.6943
106.9162
112.4723
4
112.7701
109.9731
115.5671
5
115.8460
113.0291
118.6629
6
118.9218
116.0842
121.7595
7
121.9977
119.1384
124.8570
8
125.0736
122.1918
127.9554
9
128.1494
125.2443
131.0546
10
131.2253
128.2960
134.1546
11
134.3011
131.3469
137.2554
12
137.3770
134.3970
140.3570
plot (t, z, type= 'n' , ylim = c (0 , 145 ), xlim= c (1 ,(30 + 12 )))
lines (t, z, lwd= 2 )
lines (30 + (1 : 12 ), pred[,1 ], col= 2 , lwd= 3 )
lines (30 + (1 : 12 ), pred[,2 ], col= 3 , lwd= 3 , lty= 2 )
lines (30 + (1 : 12 ), pred[,3 ], col= 3 , lwd= 3 , lty= 2 )
6
z <- scan ("export.txt" )
n <- length (z)
n
86
z_ts <- ts (z, frequency= 12 )
cycle (z_ts)
A Time Series: 8 × 12
1
1
2
3
4
5
6
7
8
9
10
11
12
2
1
2
3
4
5
6
7
8
9
10
11
12
3
1
2
3
4
5
6
7
8
9
10
11
12
4
1
2
3
4
5
6
7
8
9
10
11
12
5
1
2
3
4
5
6
7
8
9
10
11
12
6
1
2
3
4
5
6
7
8
9
10
11
12
7
1
2
3
4
5
6
7
8
9
10
11
12
8
1
2
추세성분, 계절성분(주기=12), 불규칙 성분으로 구성되어 있음
계절추세모형
\(Z_t = \beta_0+ \beta_1t + \sum_{i=1}^12 \delta_i \times I_{t1} + \epsilon_t\) , 단 \(\delta_1=0\)
note: 나는 로그변환 한 값을 넣었음
z_ts <- ts (z, frequency= 12 )
cycle (z_ts)
A Time Series: 8 × 12
1
1
2
3
4
5
6
7
8
9
10
11
12
2
1
2
3
4
5
6
7
8
9
10
11
12
3
1
2
3
4
5
6
7
8
9
10
11
12
4
1
2
3
4
5
6
7
8
9
10
11
12
5
1
2
3
4
5
6
7
8
9
10
11
12
6
1
2
3
4
5
6
7
8
9
10
11
12
7
1
2
3
4
5
6
7
8
9
10
11
12
8
1
2
seasonal_I <- as.factor (cycle (z_ts))
t <- 1 : n
m2 <- lm (z~ t+ seasonal_I)
Call:
lm(formula = z ~ t + seasonal_I)
Residuals:
Min 1Q Median 3Q Max
-10.8562 -2.2938 0.1567 2.6730 9.3951
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 21.98000 1.73500 12.669 < 2e-16 ***
t 0.43721 0.01893 23.097 < 2e-16 ***
seasonal_I2 1.71779 2.16697 0.793 0.430512
seasonal_I3 9.21741 2.24422 4.107 0.000103 ***
seasonal_I4 7.37163 2.24366 3.286 0.001566 **
seasonal_I5 9.30299 2.24326 4.147 8.98e-05 ***
seasonal_I6 12.96578 2.24302 5.780 1.72e-07 ***
seasonal_I7 9.16286 2.24294 4.085 0.000112 ***
seasonal_I8 7.73422 2.24302 3.448 0.000941 ***
seasonal_I9 11.07272 2.24326 4.936 4.88e-06 ***
seasonal_I10 10.68409 2.24366 4.762 9.47e-06 ***
seasonal_I11 12.37545 2.24422 5.514 5.03e-07 ***
seasonal_I12 18.57967 2.24494 8.276 4.26e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.334 on 73 degrees of freedom
Multiple R-squared: 0.9011, Adjusted R-squared: 0.8848
F-statistic: 55.4 on 12 and 73 DF, p-value: < 2.2e-16
적합된 모형: \(\hat Z_t = 21.98 + 0.437t + 1.72 I_{t2} + 9.22 I_{t3} + \dots 18.58 I_{t12}\)
(4) 모형설명(제약조건 주의!)
1개월이 지날 때마다 월별수출액은 평균적으로 0.437억$증가
2월은 1월에 비해 월별수출액이 1.72억$ 증가
3월은 1월에 비해 월별수출액이 9.22억$ 증가
…
12월은 1월에 비해 평균 월별수출액이 18.58억$ 증가
(5)
\(\hat Z_{n+l} = \hat Z_n(l) = \hat \beta_0 + \hat \beta_1(n+l) + \sum_{i=1}^{12} \delta_i I_{t1}\)
new_dt <- data.frame ( t = n + (1 : 12 ),
seasonal_I = as.factor (c (3 : 12 ,1 ,2 )))
new_dt
A data.frame: 12 × 2
<int>
<fct>
87
3
88
4
89
5
90
6
91
7
92
8
93
9
94
10
95
11
96
12
97
1
98
2
(6) 95% 예측 구간
1 69.2346153846154 2 67.8260439560439 3 70.1946153846154 4 74.2946153846154 5 70.9289010989011 6 69.9374725274725 7 73.7131868131868 8 73.7617582417582 9 75.8903296703297 10 82.5317582417582 11 64.3892994505495 12 66.5442994505494
pred <- predict (m2, new_dt, interval = 'prediction' )
pred
A matrix: 12 × 3 of type dbl
1
69.23462
59.82517
78.64407
2
67.82604
58.41659
77.23549
3
70.19462
60.78517
79.60407
4
74.29462
64.88517
83.70407
5
70.92890
61.51945
80.33835
6
69.93747
60.52802
79.34692
7
73.71319
64.30374
83.12264
8
73.76176
64.35231
83.17121
9
75.89033
66.48088
85.29978
10
82.53176
73.12231
91.94121
11
64.38930
55.00439
73.77421
12
66.54430
57.15939
75.92921
69.23462
= \(\hat Z_{87} = \hat Z_{86}(1) = \hat \beta_0 + \hat \beta_1 \times 87 + \delta_3 \times I_{87 \times 3}\) (3월)
70.92890
= $ Z_{86}(5) = _0 + _1 (86+5) + 7 I {91 }$ (7월)
(7)
min_ <- min (c (z, pred[,2 ]))
max_ <- max (c (z, pred[,3 ]))
plot (t, z, type= 'n' , ylim = c (min_, max_), xlim= c (1 ,(n+ 12 )))
lines (t, z, lwd= 2 )
lines (n+ (1 : 12 ), pred[,1 ], col= 2 , lwd= 3 )
lines (n+ (1 : 12 ), pred[,2 ], col= 3 , lwd= 3 , lty= 2 )
lines (n+ (1 : 12 ), pred[,3 ], col= 3 , lwd= 3 , lty= 2 )
(번외) 잔차검정
resid_m <- resid (m2)
plot (resid_m, pch= 16 , type= 'l' )
abline (h= 0 , lty= 2 )
One Sample t-test
data: resid_m
t = 2.6111e-16, df = 85, p-value = 1
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
-0.861081 0.861081
sample estimates:
mean of x
1.130835e-16
studentized Breusch-Pagan test
data: m2
BP = 18.466, df = 12, p-value = 0.1023
lmtest:: dwtest (m2, alternative= "two.sided" )
Durbin-Watson test
data: m2
DW = 0.79196, p-value = 5.014e-09
alternative hypothesis: true autocorrelation is not 0
다항추세 사용
m3 <- lm (z ~ t + I (t^ 2 ) + seasonal_I)
summary (m3)
Call:
lm(formula = z ~ t + I(t^2) + seasonal_I)
Residuals:
Min 1Q Median 3Q Max
-9.2252 -2.1156 0.0414 2.2895 7.5664
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 17.3011107 1.6527443 10.468 4.12e-16 ***
t 0.7955399 0.0637685 12.475 < 2e-16 ***
I(t^2) -0.0041187 0.0007103 -5.799 1.65e-07 ***
seasonal_I2 1.7177908 1.8014987 0.954 0.343510
seasonal_I3 8.5584096 1.8691776 4.579 1.91e-05 ***
seasonal_I4 6.6796790 1.8690680 3.574 0.000633 ***
seasonal_I5 8.5863287 1.8690137 4.594 1.81e-05 ***
seasonal_I6 12.2326445 1.8690050 6.545 7.54e-09 ***
seasonal_I7 8.4214835 1.8690354 4.506 2.50e-05 ***
seasonal_I8 6.9928457 1.8691016 3.741 0.000365 ***
seasonal_I9 10.3395882 1.8692037 5.532 4.84e-07 ***
seasonal_I10 9.9674253 1.8693449 5.332 1.07e-06 ***
seasonal_I11 11.6835000 1.8695317 6.249 2.59e-08 ***
seasonal_I12 17.9206692 1.8697737 9.584 1.72e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.603 on 72 degrees of freedom
Multiple R-squared: 0.9326, Adjusted R-squared: 0.9204
F-statistic: 76.58 on 13 and 72 DF, p-value: < 2.2e-16
Call:
lm(formula = z ~ t + seasonal_I)
Residuals:
Min 1Q Median 3Q Max
-10.8562 -2.2938 0.1567 2.6730 9.3951
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 21.98000 1.73500 12.669 < 2e-16 ***
t 0.43721 0.01893 23.097 < 2e-16 ***
seasonal_I2 1.71779 2.16697 0.793 0.430512
seasonal_I3 9.21741 2.24422 4.107 0.000103 ***
seasonal_I4 7.37163 2.24366 3.286 0.001566 **
seasonal_I5 9.30299 2.24326 4.147 8.98e-05 ***
seasonal_I6 12.96578 2.24302 5.780 1.72e-07 ***
seasonal_I7 9.16286 2.24294 4.085 0.000112 ***
seasonal_I8 7.73422 2.24302 3.448 0.000941 ***
seasonal_I9 11.07272 2.24326 4.936 4.88e-06 ***
seasonal_I10 10.68409 2.24366 4.762 9.47e-06 ***
seasonal_I11 12.37545 2.24422 5.514 5.03e-07 ***
seasonal_I12 18.57967 2.24494 8.276 4.26e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.334 on 73 degrees of freedom
Multiple R-squared: 0.9011, Adjusted R-squared: 0.8848
F-statistic: 55.4 on 12 and 73 DF, p-value: < 2.2e-16
2차 추세도 유의하고, 수정된 결정계수의 값이 증가 하였기 때문에 2차 추세 모형 사용 가능
plot.ts (resid_m3)
abline (h= 0 , lty= 2 )
One Sample t-test
data: resid_m3
t = -4.2693e-16, df = 85, p-value = 1
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
-0.7109349 0.7109349
sample estimates:
mean of x
-1.526557e-16
studentized Breusch-Pagan test
data: m3
BP = 11.389, df = 13, p-value = 0.5783
Durbin-Watson test
data: m3
DW = 1.1633, p-value = 4.187e-05
alternative hypothesis: true autocorrelation is greater than 0